#Replication code for:
#Competing for Transparency: Political Competition and Institutional Reform in Mexican States
#DANIEL BERLINER, AARON ERLICH 
#American Political Science Review , Volume 109 , Issue 01 , February 2015, pp 110 - 128

#Analysis carried out using R version 3.1.2 (2014-10-31), Apple x86_64, darwin13.4.0


setwd("INSERT WORKING DIRECTORY HERE")


library(MASS)
library(survival)
library(lubridate)
library(zoo)

d<-read.csv("BerlinerErlich2015_finaldata.csv", header=T)


#VARIABLE DESCRIPTIONS

#CODE: ISO 3166-2:MX codes for Mexican provinces
#DATE: Date 
#WEEK: Week, denoted by date of first day of each week
#MONTH: Month, denoted by first day of each month
#LAW_APPROVE: Indicator for day state ATI law was approved by legislature
#LAW_PUBLISH: Indicator for day state ATI law was published by governor
#already_app: Indicator for days after approval (to mark observations for removal)
#already_pub: Indicator for days after publication (to mark observations for removal)
#legmajdistance: Legislative majority distance: largest party seat share - 50%
#govwindistance: Gubernatorial win distance: winning candidate vote share - 50%
#LEG_DIFF_office: Difference in seat share between largest and second largest parties
#GOV_DIFF_office: Difference in vote share between winning and 2nd place gov. candidates
#ENP: Effective number of parties
#D_unified_majority: Indicator for unified majority government (governor's party also has leg majority)
#D_unified_plurality: Indicator for unified plurality government (governor's party also has leg plurality)
#D_divided: Divided government
#LegMaj_None_office: Indicator for cases where no party has legislative majority
#LegMaj_PRI_office: Indicator for PRI legislative majority
#LegMaj_PAN_office: Indicator for PAN legislative majority
#LegMaj_PRDc_office: Indicator for PRD (including cases of coalition with PT) legislative majority
#GOV_PAN: PAN governor
#GOV_PRI: PRI governor
#GOV_PANPRDc: PAN-PRD coalition governor
#GOV_PRD: PRD governor
#leg_gov_cats: Set of categories to capture very observed combination of legislative majority and gubernatorial party control
#lameduck: Indicator for days after legislative or gubernatorial election but before newly elected politicians take office
#LEG_OFFICE: Indicator for date new legislators take office
#LegMajority_office: Indicator for whether any party holds a majority in state leg
#PARTY1_or_tie_office: Set of categories for which party or coalition had legislative plurality (even if no majority) -- includes separate category for ties.
#logGDP_lag: Log total GDP, lagged one year
#logPOP_lag: Log total population, lagged one year
#GDP_GRTH_lag: GDP growth, lagged one year
#avgEduc_lag: Average years of education, lagged one year
#ICBG_lin_lag: Corruption, linearly interpolated, lagged one year
#ICBG_avg_lag: Corruption, 2001/2003 average for each state
#ICBG_rep: Corruption, repeat last observed value for years not available
#logSchools: Log number of universities providing graduate training
#logCivSoc: Log number of civil society organizations in 2001 (missing for DIF)
#logNews: Log number of newspapers in 2001 (missing for DIF)
#knn_app: Neighboring state adoption (legislative approval), 8 nearest neighbors by capital city distance
#knn_pub: Neighboring state adoption (governor publication), 8 nearest neighbors by capital city distance






d$DATE<-ymd(d$DATE)

#subset for time "at risk", starting March 22, 2001 (when GUA law first submitted to state leg)
d1<-d[d$DATE>=mdy("March 22, 2001"),]
#number of days since then
d1$DAYS<-rep(1, nrow(d1))
d1$DAYS<-as.numeric(unlist(tapply(d1$DAYS, d1$CODE, cumsum)))

#remove observations after adoption for each state
d2<-d1[d1$already_pub==0,] #d2 uses LAW_PUBLISH as the DV
d1<-d1[d1$already_app==0,] #d1 uses LAW_APPROVE as the DV

#prepare data for coxph models
coxdata<-d1
coxdata$start<-coxdata$DAYS-1
coxdata$stop<-coxdata$start+1


#MODELS IN TABLE 2

#model 1: majority distance
cox1 <- coxph(Surv(start, stop, LAW_APPROVE) ~
legmajdistance+govwindistance+
LegMaj_None_office+LegMaj_PRI_office+LegMaj_PAN_office+
GOV_PAN+GOV_PRI+GOV_PANPRDc+
lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_lin_lag+
knn_app + cluster(CODE), data=coxdata)
summary(cox1)


#model 2: margin of victory
cox2 <- coxph(Surv(start, stop, LAW_APPROVE) ~
LEG_DIFF_office+GOV_DIFF_office+
LegMaj_None_office+LegMaj_PRI_office+LegMaj_PAN_office+
GOV_PAN+GOV_PRI+GOV_PANPRDc+
lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_lin_lag+
knn_app + cluster(CODE), data=coxdata)
summary(cox2)

#model 3: effective number of parties
cox3 <- coxph(Surv(start, stop, LAW_APPROVE) ~
ENP+
LegMaj_None_office+LegMaj_PRI_office+LegMaj_PAN_office+
GOV_PAN+GOV_PRI+GOV_PANPRDc+
lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_lin_lag+
knn_app + cluster(CODE), data=coxdata)
summary(cox3)

#model 4: divided gov
cox4 <- coxph(Surv(start, stop, LAW_APPROVE) ~
D_unified_majority+
D_divided+
LegMaj_None_office+LegMaj_PRI_office+LegMaj_PAN_office+
GOV_PAN+GOV_PRI+GOV_PANPRDc+
lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_lin_lag+
knn_app + cluster(CODE), data=coxdata)
summary(cox4)

#model 5: demand factors
cox5 <- coxph(Surv(start, stop, LAW_APPROVE) ~
legmajdistance+govwindistance+
LegMaj_None_office+LegMaj_PRI_office+LegMaj_PAN_office+
GOV_PAN+GOV_PRI+GOV_PANPRDc+
lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_lin_lag+
knn_app +
logSchools+logCivSoc+logNews+ cluster(CODE), data=coxdata)
summary(cox5)

#model 6: LAW_PUBLISH as DV
coxdata2<-d2
coxdata2$start<-coxdata2$DAYS-1
coxdata2$stop<-coxdata2$start+1

cox6 <- coxph(Surv(start, stop, LAW_PUBLISH) ~
legmajdistance+govwindistance+
LegMaj_None_office+LegMaj_PRI_office+LegMaj_PAN_office+
GOV_PAN+GOV_PRI+GOV_PANPRDc+
lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_lin_lag+
knn_pub + cluster(CODE), data=coxdata2)
summary(cox6)



#MODELS IN TABLE 3

#logit model with time, time^2, time ^3
logit1 <- glm(LAW_APPROVE ~
legmajdistance+govwindistance+
LegMaj_None_office+LegMaj_PRI_office+LegMaj_PAN_office+
GOV_PAN+GOV_PRI+GOV_PANPRDc+
lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_lin_lag+
knn_app + I(DAYS/10) + I((DAYS^2)/100) + I((DAYS^3)/1000)
, data=coxdata, family=binomial(link=logit))
summary(logit1)


#dummy variables for every leg X gov combination
cox_leggovcats <- coxph(Surv(start, stop, LAW_APPROVE) ~
legmajdistance+govwindistance+
I(leg_gov_cats=="lNOM_gPAN") +  #reference category is lPRI_gPRI
I(leg_gov_cats=="lNOM_gPPC") + 
I(leg_gov_cats=="lNOM_gPRD") + 
I(leg_gov_cats=="lNOM_gPRI") + 
I(leg_gov_cats=="lPAN_gPAN") + 
I(leg_gov_cats=="lPRD_gPRD") + 
I(leg_gov_cats=="lPRI_gPPC") + 
I(leg_gov_cats=="lPRI_gPRD") +lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_lin_lag+
knn_app + cluster(CODE), data=coxdata)
summary(cox_leggovcats)


#aggregate to weekly data
#for each week, use data from DAY 1 of week, for all vars except passage
uc<-as.character(unique(d1$CODE))
wdata<-NULL

for(i in 1:length(uc)){
	wdatacut<-d1[d1$CODE==uc[i],]
	wdatacut$WEEK<-as.character(wdatacut$WEEK)

	wdataTEMP<-as.data.frame(cbind(
	tapply(wdatacut$LAW_APPROVE, wdatacut$WEEK, function(x){as.numeric(sum(x)>0)}) ,
	tapply(wdatacut$legmajdistance, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$govwindistance, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$LegMaj_None_office, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$LegMaj_PRI_office, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$LegMaj_PAN_office, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$GOV_PAN, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$GOV_PRI, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$GOV_PANPRDc, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$lameduck, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$logGDP_lag, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$logPOP_lag, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$GDP_GRTH_lag, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$avgEduc_lag, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$ICBG_lin_lag, wdatacut$WEEK, function(x){x[1]}) ,
	tapply(wdatacut$knn_app, wdatacut$WEEK, function(x){x[1]}) 	
	))
	colnames(wdataTEMP)<-c("LAW_APPROVE", "legmajdistance", "govwindistance", 
		"LegMaj_None_office", "LegMaj_PRI_office", "LegMaj_PAN_office", "GOV_PAN", "GOV_PRI", "GOV_PANPRDc",
		 "lameduck", "logGDP_lag", "logPOP_lag", "GDP_GRTH_lag", "avgEduc_lag", "ICBG_lin_lag", "knn_app")
	wdataTEMP$CODE<-tapply(wdatacut$CODE, wdatacut$WEEK, function(x){as.character(x[1])})
	wdata<-rbind(wdataTEMP, wdata)
}

wdata$CODE<-as.character(wdata$CODE)
wdata$WEEKS<-rep(1, nrow(wdata))
wdata$WEEKS<-as.numeric(unlist(tapply(wdata$WEEKS, wdata$CODE, cumsum)))
wdata$start<-wdata$WEEKS-1
wdata$stop<-wdata$start+1

wcox <- coxph(Surv(start, stop, LAW_APPROVE) ~
legmajdistance+govwindistance+
LegMaj_None_office+LegMaj_PRI_office+LegMaj_PAN_office+
GOV_PAN+GOV_PRI+GOV_PANPRDc+
lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_lin_lag+
knn_app + cluster(CODE), data=wdata)
summary(wcox)



#aggregate to monthly data
#for each month, use data from DAY 1 of month, for all vars except passage
uc<-as.character(unique(d1$CODE))
mdata<-NULL

for(i in 1:length(uc)){
	mdatacut<-d1[d1$CODE==uc[i],]
	mdatacut$MONTH<-as.character(mdatacut$MONTH)
	mdataTEMP<-as.data.frame(cbind(
	tapply(mdatacut$LAW_APPROVE, mdatacut$MONTH, function(x){as.numeric(sum(x)>0)}) ,
	tapply(mdatacut$legmajdistance, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$govwindistance, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$LegMaj_None_office, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$LegMaj_PRI_office, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$LegMaj_PAN_office, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$GOV_PAN, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$GOV_PRI, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$GOV_PANPRDc, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$lameduck, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$logGDP_lag, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$logPOP_lag, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$GDP_GRTH_lag, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$avgEduc_lag, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$ICBG_lin_lag, mdatacut$MONTH, function(x){x[1]}) ,
	tapply(mdatacut$knn_app, mdatacut$MONTH, function(x){x[1]}) 	
	))
	colnames(mdataTEMP)<-c("LAW_APPROVE", "legmajdistance", "govwindistance", "LegMaj_None_office", "LegMaj_PRI_office", "LegMaj_PAN_office", 
			"GOV_PAN", "GOV_PRI", "GOV_PANPRDc", "lameduck", "logGDP_lag", "logPOP_lag", "GDP_GRTH_lag", "avgEduc_lag", "ICBG_lin_lag", "knn_app")
	mdataTEMP$CODE<-tapply(mdatacut$CODE, mdatacut$MONTH, function(x){as.character(x[1])})
	mdata<-rbind(mdataTEMP, mdata)
}

mdata$CODE<-as.character(mdata$CODE)
mdata$MONTHS<-rep(1, nrow(mdata))
mdata$MONTHS<-as.numeric(unlist(tapply(mdata$MONTHS, mdata$CODE, cumsum)))
mdata$start<-mdata$MONTHS-1
mdata$stop<-mdata$start+1

mcox <- coxph(Surv(start, stop, LAW_APPROVE) ~
legmajdistance+govwindistance+
LegMaj_None_office+LegMaj_PRI_office+LegMaj_PAN_office+
GOV_PAN+GOV_PRI+GOV_PANPRDc+
lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_lin_lag+
knn_app + cluster(CODE), data=mdata)
summary(mcox)


#alternate corruption measure #1
cox_corr_avg <- coxph(Surv(start, stop, LAW_APPROVE) ~
legmajdistance+govwindistance+
LegMaj_None_office+LegMaj_PRI_office+LegMaj_PAN_office+
GOV_PAN+GOV_PRI+GOV_PANPRDc+
lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_avg_lag+
knn_app + cluster(CODE), data=coxdata)
summary(cox_corr_avg)


#alternate corruption measure #2
cox_corr_rep <- coxph(Surv(start, stop, LAW_APPROVE) ~
legmajdistance+govwindistance+
LegMaj_None_office+LegMaj_PRI_office+LegMaj_PAN_office+
GOV_PAN+GOV_PRI+GOV_PANPRDc+
lameduck+
logGDP_lag+logPOP_lag+GDP_GRTH_lag+avgEduc_lag+
ICBG_rep+ #if use lag, then we lose 2001
knn_app + cluster(CODE), data=coxdata)
summary(cox_corr_rep)







#CREATE GRAPHICS

#make spells of party-legislative-periods
uc<-as.character(unique(d$CODE))
d$spell<-rep(NA,nrow(d))
for(i in 1:length(uc)){
	d$spell[d$CODE==uc[i]][1]<-i
}
d$spell[d$LEG_OFFICE==1]<-33:(32+117)
d$spell<-as.numeric(unlist(tapply(d$spell, d$CODE, na.locf, na.rm = FALSE)))

dayset<-seq(mdy("January 1, 2001"), mdy("March 1, 2007"), length.out=32)
allstates<-as.character(unique(d$CODE))
nstate<-length(allstates)

passdates1<-d[d$LAW_APPROVE==1,c("CODE","DATE")]
allstates_order<-as.character(passdates1$CODE[order(passdates1$DATE, decreasing=T)])

#COLOR PLOT
plot(dayset, 1:nstate, type="n", yaxt="n", xaxt="n", ylab="", xlab="Year", bty="n")
axis(side=2, at=1:nstate+0.5, labels=(substr(allstates_order,4,6)), las=2, tck=-0.02, cex=0.5)
axis(side=1, at=seq(mdy("January 1, 2001"), mdy("March 1, 2007"), by="years"), labels=2001:2007, las=1, tck=-0.02, cex=0.8)

for(i in 1:149){
	date1<-min(d$DATE[d$spell==i])
	date2<-max(d$DATE[d$spell==i])
	p1<-as.character(d$PARTY1_or_tie_office[d$spell==i][1])
	c_temp<-as.character(unique(d$CODE[d$spell==i]))
	y1<- which(allstates_order==c_temp)
	y2<- y1+1
	coltemp<-c("#FB6A4A","#FB6A4A", "#FB6A4A","#6BAED6","#FED976","#FED976")[which(c("PRI-PRD","PRI-PAN","PRI","PAN","PRD","PRD+PT")==p1)]
	polygon(x=c(date1,date2,date2,date1), y=c(y1,y1,y2,y2), col=coltemp, border=NA)
	if(p1=="PRI-PAN") polygon(x=c(date1,date2,date2,date1), y=c(y1,y1,y2,y2), col="#6BAED6", border=NA, density=40, angle=45, lwd=1)
	if(p1=="PRI-PRD") polygon(x=c(date1,date2,date2,date1), y=c(y1,y1,y2,y2), col="#FFEDA0", border=NA, density=40, angle=45, lwd=1)
}
segments(x0=rep(mdy("January 1, 2000"), nstate), x1=rep(mdy("March 1, 2008"), nstate), y0=1:nstate, y1=1:nstate, col="white", lwd=1.3)

for(i in 1:32){
	points(x=d$DATE[d$LAW_APPROVE==1 & d$CODE==uc[i]], y=which(allstates_order==uc[i])+0.5, pch=19, cex=0.8)
}


#BLACK AND WHITE PLOT 
plot(dayset, 1:nstate, type="n", yaxt="n", xaxt="n", ylab="", xlab="Year", bty="n")
axis(side=2, at=1:nstate+0.5, labels=(substr(allstates_order,4,6)), las=2, tck=-0.02, cex=0.5)
axis(side=1, at=seq(mdy("January 1, 2001"), mdy("March 1, 2007"), by="years"), labels=2001:2007, las=1, tck=-0.02, cex=0.8)

for(i in 1:149){
	date1<-min(d$DATE[d$spell==i])
	date2<-max(d$DATE[d$spell==i])
	p1<-as.character(d$PARTY1_or_tie_office[d$spell==i][1])
	c_temp<-as.character(unique(d$CODE[d$spell==i]))
	y1<- which(allstates_order==c_temp)
	y2<- y1+1
	coltemp<-c("gray92","gray92", "gray92","gray45","gray70","gray70")[which(c("PRI-PRD","PRI-PAN","PRI","PAN","PRD","PRD+PT")==p1)]
	polygon(x=c(date1,date2,date2,date1), y=c(y1,y1,y2,y2), col=coltemp, border=NA)
	if(p1=="PRI-PAN") polygon(x=c(date1,date2,date2,date1), y=c(y1,y1,y2,y2), col="gray45", border=NA, density=40, angle=45, lwd=1.5)
	if(p1=="PRI-PRD") polygon(x=c(date1,date2,date2,date1), y=c(y1,y1,y2,y2), col="gray70", border=NA, density=40, angle=45, lwd=1.5)
}
segments(x0=rep(mdy("January 1, 2000"), nstate), x1=rep(mdy("March 1, 2008"), nstate), y0=1:nstate, y1=1:nstate, col="white", lwd=1.3)

for(i in 1:32){
	points(x=d$DATE[d$LAW_APPROVE==1 & d$CODE==uc[i]], y=which(allstates_order==uc[i])+0.5, pch=19, cex=0.8)
}





#Simulated results for Hidalgo: Figure in appendix

#create real and alternate versions of Hidalgo
HID_1 <- coxdata[coxdata$CODE=="MX-HID",]
HID_2 <- HID_3 <- HID_1

#A counterfactual where Hidalgo PRI did worse in the 2002 leg elections and had only 52% of seats 
#But still were the majority in the legislature and held the governorship. 
HID_2$legmajdistance[HID_2$DATE>=mdy("April 1, 2002")] <- 2

#A counterfactual where Hidalgo PRI did worse in the 2005 leg elections and had only 52% of seats 
#But still were the majority in the legislature and held the governorship. 
HID_3$legmajdistance[HID_3$DATE>=mdy("April 1, 2005")] <- 2


fit1 <- survfit(cox1, newdata=HID_1, ylab="Survival", conf.type="none", id=HID_1$CODE) 
fit2 <- survfit(cox1, newdata=HID_2, ylab="Survival", conf.type="none", id=HID_1$CODE)
fit3 <- survfit(cox1, newdata=HID_3, ylab="Survival", conf.type="none", id=HID_1$CODE)

plot(fit1, xlab="Days", ylab="Survival", yaxt="n", mark.time=FALSE, lwd=2)
axis(2, at=c(0,0.25,.5,.75,1), labels = c("0", ".25", ".5", ".75" ,"1"), las=2)
lines(fit2, lty=3, mark.time=FALSE, lwd=2)
lines(fit3, lty=4,  mark.time=FALSE, lwd=2)
abline(h= c(.25,.5,.75),lty=2,col="grey60")








